##
rm(list=ls())
gc()


library(data.table)
library(ggplot2)
library(dplyr)
library(stringr)
library(scales)
library(tidyr)

files = paste0('results/',list.files('results'))
files = files[grepl('district', files)]
files = files[(grepl('2008',files)&grepl('2012',files))|(grepl('2012',files) & grepl('2016',files))|(grepl('2016',files)&grepl('2020',files))]
files = files[!grepl('NA.Rdata',files)]
files = files[!grepl('aspatial',files)]
files = files[grepl('current',files)]

results = rbindlist(lapply(files, FUN=function(file){

  
  load(file)
  
  models = ls()[grepl('Model',ls())]
  print(file)
  l = rbindlist(lapply(models, FUN=function(x){
    #print(x)
    m = get(x)
    out = as.data.table(m$coefficient)
    out[,Covariate:=rownames(m$coefficient)]
    out[,Model:=x]
    out[,N:=m$N]
    out[,`R-Squared`:=m$r.squared]
    out[,`Adjusted R-Squared`:=m$adj.r.squared]
    out[,Sigma:=m$sigma]
    out[,FStat:=m$fstat]
    if('Cluster s.e.'%in%names(out)){
      out[,SE:=`Cluster s.e.`]
      out[,SE.type:='Cluster']
      out[,`Cluster s.e.`:=NULL]
    } else {
      out[,SE:=`Robust s.e`]
      out[,SE.type:='Robust']
      out[,`Robust s.e`:=NULL]
    }


return(out)

  }))
  
  l[grepl('2008',file),Year1:='2008']
  l[grepl('2008',file),Year2:='2012']
l[grepl('2020',file),Year1:='2016']
l[grepl('2020',file),Year2:='2020'] 
l[!grepl('2008',file)&!grepl('2020',file),Year1:='2012']
l[!grepl('2008',file)&!grepl('2020',file),Year2:='2016']
l[,district:=str_replace(str_replace(file,paste0('results/current-results-',Year1, '-',Year2,'-district-'),''),'.Rdata','')]
l[,file:=file]

return(l)
}))

print(mean(is.na(results$Estimate)))

results = results[Covariate %in% c('DemSpExpDiff_nohh','RepSpExpDiff_nohh')]


results[grepl('DemSpExp',Model),Exposure.Type:='Effect of\nexposure to\nDemocrats on\nDemocratic\nregistration']
results[grepl('RepSpExp',Model),Exposure.Type:='Effect of\nexposure to\nRepublicans on\nRepublican\nregistration']

results[grepl('Dems',Model),Subset:='Democrats']
results[grepl('Reps',Model),Subset:='Republicans']
results[grepl('Oths',Model),Subset:='Non-partisans']
results[,Years:=paste0(Year1,'-',Year2)]




d = fread('1976-2018-house.csv')[year%in%2012:2020 & state %in% c('California','North Carolina','New York','Florida','Kansas')]

d[,pct:=candidatevotes/totalvotes]


# calculate vote margin for each year and state district

# set order by year state district in descending order of pct of vote (so winner is top row)
setorderv(d, c('year','state','district','pct'),c(1,-1,-1,-1))

d = d[,list(candidate=.N,
            margin = pct[1]-pct[2],
            winner = party[1],
            winning.pct=pct[1],
            runnerup = party[2],
            runerup.pct = pct[2]),by=c('year','state','district')]



# create stacked ata for average margin between 2012-2016, and 2016-2020, where appropriate 
d1 = d[year %in% 2012:2016][,list(
  avg.margin=mean(margin,na.rm=T),
  min.margin=min(margin,na.rm=T),
  dem.control = mean(winner=='democrat',na.rm=T),
  rep.control = mean(winner=='republican',na.rm=T),
  n=.N,
  nas=sum(is.na(margin))
    
    
    
  ),by=c('state','district')][,Years:='2012-2016']



d2 = d[year %in% 2016:2020][,list(
  avg.margin=mean(margin,na.rm=T),
  min.margin=min(margin,na.rm=T),
  dem.control = mean(winner=='democrat',na.rm=T),
  rep.control = mean(winner=='republican',na.rm=T),
  n=.N,
  nas=sum(is.na(margin))
  
  
  
  
),by=c('state','district')][,Years:='2016-2020']

d = bind_rows(d1,d2)
d[,district:=case_when(state=='California'~paste0('CA',district),
                                state=='New York'~paste0('NY',district),
                                state=='Kansas'~paste0('KS',district),
                                state=='North Carolina'~paste0('NC',district),
                                state=='Florida'~paste0('FL',district)
)]


# merge to results, to classify results by competitive and uncompetive
results = merge(results,d,all.x=T,by=c('Years','district'))

# classify minimum margin as > .2 smallest margin and controlled by same party throught, competitive otherwise
results[,Type := case_when(min.margin>.2 & (dem.control==1|rep.control==1)~'Uncompetitive',
                          T ~ 'Competitive')]


colors = c(Democrats = "#377EB8", Republicans = "#E41A1C", `Non-partisans` = "purple")

library(grid)
g = ggplot(results, aes(x = Estimate, weight = N, fill = Subset, color = Subset, linetype = Type))+
  geom_density(bw=.025,alpha=.2)+
  theme_bw()+
  geom_vline(xintercept = 0, linetype = 'dashed')+
  facet_grid(Exposure.Type ~ Years + Subset)+
  scale_color_manual(values=colors)+
  scale_fill_manual(values=colors)+
  xlab('District-level effect of partisan exposure')+
  ylab('Density')+
  theme(panel.spacing = unit(1, 'lines'))+
  guides(color = 'none', fill = 'none')+
  theme(legend.position = 'bottom',legend.title = element_blank(),
        text = element_text(size=15, family='serif'), 
        
        strip.text.y = element_text(size=12,angle = 0))


ggsave(plot = g, filename = 'figures/Fig8.png',dpi=300,width = 11, height  =4 ,units='in')

